Imported and cleaned all kiln data available from 2018-2020. Involved using an algorithm to remove high peaks and valleys, detection of when the “start” of a run was based on setpoint increases and kiln temperature increases. We now have mostly clean plots with a few exceptions.
Assorted lots from each kiln.
all_kilns <- bind_rows(
kilns_AB %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_C %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_D %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_E %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_F %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_G %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_H %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln)
)
# random sample of LOTNOs
set.seed(505)
n_kilns <- sample_n(all_kilns, 16) %>% dplyr::select(LOTNO) %>% unlist()
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 2) %>% dplyr::select(LOTNO) %>% unlist()
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- unlist(n_kilns)
sample_kilns <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns, lot_compare=T)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_a <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_a)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_b <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_b)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_c <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_c)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_d <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_d)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_e <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_e)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_f <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_f)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_g <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_g)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_h <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_h)
One measure mentioned to have potential importance in defect rate is variation between setpoint and average kiln temperature in the 400°C to 600°C range. An algorithm takes the absolute value of the difference between the values, and adds them together to produce a new feature, displayed below.
Base plot of temperature and setpoint over time, with green area representing the claculated area between the two curves. Numeric values also printed for comparison.
plotAucValues(sample_kilns, x.nudge = 900, y.nudge = 0)
plotAucValues(sample_kilns, crop=T, x.nudge = 0, y.nudge = 200)
plotAucValues(sample_kilns, crop=T, free.x=T)
Distribution varies greatly between kilns
Kilns G and H have by far the most consistent operation based on our measure.
df_merged_auc %>%
group_by(LOTNO) %>% slice(1) %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
# ggplot(aes(x=aucDiff, y=fct_reorder(KILN,aucDiff), fill =KILN2))+
ggplot(aes(x=aucDiff, y=fct_reorder(KILN2,aucDiff)))+
geom_boxplot(outlier.alpha = 0,
outlier.shape = 21)+
geom_jitter(height = .2, alpha=.1)+
labs(title = "Setpoint vs temperature variation between kilns")+
xlab("Area between curves")+
ylab("Kiln")+
theme(legend.position = 'none')+
scale_x_continuous(labels = scales::label_number())
Most of the distributions are not distributed normally and potentially require transformation depending on the analysis performed.
df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
ggplot(aes(x=aucDiff, y = ..count../sum(..count..)))+
geom_density()+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
facet_wrap(~KILN2, scales='free')
# facet_wrap(~KILN2)
How does our new area between the curves feature relate to yield values?
Starting very broadly, there are very weak correlations between the AUC feature and overall lot yields.
# join correlation of AUC, lot yield to original DF and plot
df <- df_yields_auc %>%
group_by(LOTNO, KILN, aucDiff, temp_avg, precip, snow_fall, snow_depth) %>%
dplyr::summarise(
total_fired = sum(TOTAL_ITEM_FIRED),
total_rejected = sum(TOTAL_ITEM_REJECTED),
pct_lot_yield = (total_fired - total_rejected) / total_fired
) %>%
mutate(KILN2 = str_replace(KILN, "R", ""))
df <- df %>%
group_by(KILN2) %>%
dplyr::summarise(cor = cor(pct_lot_yield, aucDiff)) %>%
left_join(df) %>%
mutate(kiln_cor = factor(paste0(KILN2, " (", round(cor,3), ")")))
df %>%
ggplot(aes(x=pct_lot_yield, y=aucDiff))+
geom_pointdensity(alpha=.8, size=1)+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
facet_wrap(~kiln_cor, scales='free_y')+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus entire lot yields',
subtitle = 'Correlation value (in parentheses)')+
theme(legend.position = 'none')
df %>% count(cor, KILN2) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Kiln", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Kiln | Observations |
|---|---|---|
| 0.27 | E | 19 |
| -0.24 | A | 134 |
| -0.23 | D | 90 |
| -0.18 | C | 55 |
| 0.13 | F | 129 |
| -0.05 | B | 124 |
| 0.03 | G | 288 |
| -0.01 | H | 216 |
Trendline added.
df %>%
ggplot(aes(x=pct_lot_yield, y=aucDiff))+
geom_pointdensity(alpha=.8, size=1)+
geom_smooth(alpha=.1, color = 'red')+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
facet_wrap(~kiln_cor, scales='free_y')+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus entire lot yields',
subtitle = 'Correlation value (in parentheses)')+
theme(legend.position = 'none')
Cropped the x-axis: 80 - 100%
# join correlation of AUC, lot yield to original DF and plot
df <- df_yields_auc %>%
group_by(LOTNO, KILN, aucDiff, temp_avg, precip, snow_fall, snow_depth) %>%
dplyr::summarise(
total_fired = sum(TOTAL_ITEM_FIRED),
total_rejected = sum(TOTAL_ITEM_REJECTED),
pct_lot_yield = (total_fired - total_rejected) / total_fired
) %>%
mutate(KILN2 = str_replace(KILN, "R", ""))
df <- df %>%
group_by(KILN2) %>%
dplyr::summarise(cor = cor(pct_lot_yield, aucDiff)) %>%
left_join(df) %>%
mutate(kiln_cor = factor(paste0(KILN2, " (", round(cor,3), ")")))
df %>%
ggplot(aes(x=pct_lot_yield, y=aucDiff))+
geom_pointdensity(alpha=.8, size=1)+
scale_x_continuous(limits = c(0.8,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
facet_wrap(~kiln_cor, scales='free_y')+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus entire lot yields',
subtitle = 'Correlation value (in parentheses)')+
theme(legend.position = 'none')
Above, we tested the idea that yields of an entire lot could be affected by our AUC feature. Next, we hone in on specific kilns and determine if different items might show different yields within the kiln.
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "A")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| 0.42 | 10“ODX1.75”,10PPI,CB | 10 |
| -0.28 | 5.19“TX4.13”BX1",10PPI,CB | 31 |
| -0.17 | 4“X4”X1",10PPI,CB | 10 |
| 0.15 | 6.1“TX4.7”BX1.25",10PPI,CB | 27 |
| 0.14 | 8“ODX1.5”,10PPI,CB | 11 |
| 0.12 | 5“ODX1.25”,10PPI,CB | 17 |
| -0.1 | 5.125“X5.125”X1.25",10PPI,CB | 18 |
| 0.08 | 7.5“X7.5”X.400",65PPI,CORD | 10 |
| -0.06 | 2“X2”X.75",10PPI,CB | 21 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "B")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| 0.33 | 3“X3”X1",10PPI,CB | 22 |
| 0.24 | 5.125“X5.125”X1.25",10PPI,CB | 22 |
| -0.23 | 4“X4”X1",10PPI,CB | 13 |
| 0.22 | 5“ODX1.25”,10PPI,CB | 11 |
| -0.15 | 2“X2”X.75",10PPI,CB | 16 |
| 0.14 | 5.19“TX4.13”BX1",10PPI,CB | 31 |
| 0.09 | 6.1“TX4.7”BX1.25",10PPI,CB | 27 |
| -0.06 | 3“ODX1”,MO10PPI,CB | 12 |
| 0.03 | UDICELL 125X125X30 10PPI CB | 13 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "C")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| -0.82 | 4“X4”X1"-IC,10PPI,PSZT | 7 |
| -0.75 | 3“X3”X1"-IC,10PPI,PSZT | 7 |
| -0.62 | 1.5“ODX.8”,45PPI,PSZT,RBFG | 10 |
| -0.56 | UDICELL 150X150X30 10PPI PSZT | 8 |
| 0.44 | 2.6“X2.6”X.75",10PPI,PSZT | 7 |
| 0.37 | 4“X4”X1",10PPI,PSZT | 6 |
| -0.25 | UDICELL DIA 70X25 10PPI PSZT | 7 |
| 0.17 | 2.75“ODX1.25”IDX.625",10PPI,PSZT | 8 |
| 0.14 | 4“ODX1”,10PPI,PSZT | 9 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "D")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| -0.27 | 3“X4”X.875",15PPI,AL92,SEC | 26 |
| -0.25 | SQUARE,<5PPI,AL92 | 19 |
| 0.25 | SMALLTOMB-1",<5PPI,AL92 | 16 |
| 0.24 | 10“TODX1.5”,50APPI,ALLT,LAM | 53 |
| 0.23 | DWG.C039-ARC,MO20PPI,AL92 | 15 |
| -0.1 | SQUARE,30PPI,AL92 | 36 |
| 0.09 | SMALLARC-1",<5PPI,AL92 | 18 |
| 0.04 | 495MMX422MMX57MM-DWG,30APPI,OBSIC | 15 |
| 0.03 | SQUARE,45PPI,AL92 | 15 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "E")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| -1 | 145MMTODX122MMBODX38MM,50APPI,ALLT,SAF | 2 |
| -1 | 2“X4”X.875",10PPI,AL92 | 2 |
| -1 | 3“X4”X.875",20PPI,AL92,SEC | 2 |
| -1 | 3“X4”X1.75",15/20PPI,AL92,SEC | 2 |
| 1 | 4“ODX1”,10PPI,PSZM | 2 |
| 0.97 | 495MX422MX57M-DWG,45APPI,OBSIC | 3 |
| 0.94 | 495MMX422MMX57MM-DWG,30APPI,OBSIC | 4 |
| 0.65 | 75MMX75MMX22MM,20PPI,AL92 | 3 |
| 0.43 | 3“X4”X.875",15PPI,AL92,SEC | 5 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "F")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| 0.6 | 2.75“ODX1.25”IDX.625",10PPI,PSZT | 18 |
| 0.59 | 3“ODX1”,15PPI,PSZT,1/8"FG | 14 |
| 0.35 | UDICELL 150X150X30 10PPI PSZT | 19 |
| 0.25 | 7“ODX1.25”,MO10PPI,PSZM | 19 |
| 0.24 | UDICELL DIA 150X30 10PPI PSZT | 15 |
| 0.1 | UDICELL 125X125X30 10PPI PSZT | 24 |
| -0.09 | 4“ODX1”,10PPI,PSZT | 21 |
| -0.04 | 4“X4”X1"-MD,10PPI,PSZT | 20 |
| -0.03 | 3“X4”X.875",15PPI,AL92,SEC | 15 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "G")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| -0.38 | 44MMODX26MMIDX15MM,45PPI,AL99,FC | 62 |
| -0.18 | 4.5“ODX2.5”IDX1.375",20PPI,PSZM | 40 |
| 0.18 | 7“X7”X1.25",12.5MMC,PSZM | 38 |
| 0.14 | 7.33“TX6”BX1.25",10MMC,PSZM | 40 |
| -0.11 | 10“TODX2.5”-3"STRIP,10/20PPI,PSZM,FEC | 58 |
| -0.06 | 2“ODX.5”,30PPI,PSZM,SEC | 64 |
| -0.02 | 1.5“ODX.5”,30PPI,PSZM,SEC | 46 |
| 0.02 | 6“X6”X1.25"-SP,12.5MMC,PSZM | 41 |
| 0 | 1.5“ODX.8”,45PPI,PSZT,RBFG | 84 |
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "H")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
dfg <- df %>%
ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
# geom_point()+
# geom_bin2d()+
# stat_density_2d(aes(fill=..level..))+
# stat_bin_hex()+
geom_pointdensity()+
scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
xlab('Lot yield')+
ylab('Area between curves')+
labs(title = 'AUC versus item yields')+
# facet_wrap(~descr_cor)+
facet_wrap(~descr_cor, scales='free_y')+
theme(legend.position = 'none')
ggplotly(dfg)
# table
df %>%
count(cor, DESCRIPTION) %>%
arrange(-abs(cor)) %>%
mutate(
cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
) %>%
set_colnames(c("Correlation", "Description", "Observations")) %>%
kable(format = 'html', escape = 'F') %>%
kable_styling('striped',full_width = F)
| Correlation | Description | Observations |
|---|---|---|
| -0.35 | 1“ODX.5”,45PPI,AL99,SEC | 22 |
| 0.3 | .75“ODX.5”,45PPI,AL99,SEC | 24 |
| 0.18 | 3“X4”X.875",15PPI,AL92,SEC | 24 |
| 0.15 | DWGC097-REVA-TOMB,30PPI,AL99 | 27 |
| -0.14 | .91“ODX.5”-C259B,30PPI,AL99,SEC,“H4” | 75 |
| -0.13 | .91“ODX.5”-C259B,15PPI,AL99,SEC,“H6” | 54 |
| -0.11 | 150MMODX90MMIDX200MM,10PPI,PSZM,FEC | 72 |
| 0.1 | 150MMODX90MMIDX125MM,10PPI,PSZM,FEC | 39 |
| 0.03 | .75“ODX.5”,30PPI,AL99,SEC | 125 |
Does AUC effect the occurence of cracked webs of an item, on a per lot